Short Report of Spectronaut output:
load data from Spectronaut pipeline mode output
library(tidyverse)
## ── Attaching packages ──────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 2.2.1.9000 ✔ purrr 0.2.4
## ✔ tibble 1.4.2 ✔ dplyr 0.7.4
## ✔ tidyr 0.8.0 ✔ stringr 1.3.0
## ✔ readr 1.1.1 ✔ forcats 0.3.0
## ── Conflicts ─────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
setwd("~/Desktop/Projects/2018_Laura_HBE_cell_infection_Saureus_side_analysis_SN/20180327_145604_TW_Saureus_HBE_iRT_percentile0-25_SN_normalization")
d<-read_delim("Report_full Report complex (default) (Normal).xls",delim ="\t",col_names = T)
## Parsed with column specification:
## cols(
## .default = col_double(),
## R.Condition = col_character(),
## R.FileName = col_character(),
## R.Fraction = col_character(),
## R.Label = col_character(),
## R.Replicate = col_integer(),
## PG.Organisms = col_character(),
## PG.ProteinAccessions = col_character(),
## PG.ProteinGroups = col_character(),
## PG.RunEvidenceCount = col_integer(),
## PEP.GroupingKey = col_character(),
## PEP.GroupingKeyType = col_character(),
## PEP.IsProteotypic = col_character(),
## PEP.NrOfMissedCleavages = col_integer(),
## PEP.StrippedSequence = col_character(),
## PEP.Rank = col_integer(),
## PEP.RunEvidenceCount = col_integer(),
## PEP.UsedForProteinGroupQuantity = col_character(),
## EG.IsDecoy = col_character(),
## EG.Label = col_character(),
## EG.Library = col_character()
## # ... with 19 more columns
## )
## See spec(...) for full column specifications.
colnames(d)
## [1] "R.Condition"
## [2] "R.FileName"
## [3] "R.Fraction"
## [4] "R.Label"
## [5] "R.Replicate"
## [6] "PG.Organisms"
## [7] "PG.ProteinAccessions"
## [8] "PG.ProteinGroups"
## [9] "PG.Cscore"
## [10] "PG.Qvalue"
## [11] "PG.RunEvidenceCount"
## [12] "PEP.GroupingKey"
## [13] "PEP.GroupingKeyType"
## [14] "PEP.IsProteotypic"
## [15] "PEP.NrOfMissedCleavages"
## [16] "PEP.StrippedSequence"
## [17] "PEP.Rank"
## [18] "PEP.RunEvidenceCount"
## [19] "PEP.Quantity"
## [20] "PEP.UsedForProteinGroupQuantity"
## [21] "EG.iRTPredicted"
## [22] "EG.IsDecoy"
## [23] "EG.Label"
## [24] "EG.Library"
## [25] "EG.ModifiedPeptide"
## [26] "EG.ModifiedSequence"
## [27] "EG.PrecursorId"
## [28] "EG.UserGroup"
## [29] "EG.Workflow"
## [30] "EG.Identified"
## [31] "EG.IsUserPeak"
## [32] "EG.IsVerified"
## [33] "EG.PEP"
## [34] "EG.Qvalue"
## [35] "EG.Svalue"
## [36] "EG.ApexRT"
## [37] "EG.DatapointsPerPeak"
## [38] "EG.DeltaiRT"
## [39] "EG.DeltaRT"
## [40] "EG.iRTEmpirical"
## [41] "EG.MeanApexRT"
## [42] "EG.MeanTailingFactor"
## [43] "EG.RTPredicted"
## [44] "EG.SignalToNoise"
## [45] "EG.AvgProfileQvalue"
## [46] "EG.MaxProfileQvalue"
## [47] "EG.MinProfileQvalue"
## [48] "EG.PercentileQvalue"
## [49] "EG.NormalizationFactor"
## [50] "EG.ReferenceQuantity (Settings)"
## [51] "EG.TargetQuantity (Settings)"
## [52] "EG.TargetReferenceRatio (Settings)"
## [53] "EG.TotalQuantity (Settings)"
## [54] "EG.UsedForPeptideQuantity"
## [55] "EG.UsedForProteinGroupQuantity"
## [56] "EG.Cscore"
## [57] "EG.IntCorrScore"
## [58] "EG.Noise"
## [59] "FG.Charge"
## [60] "FG.FragmentCount"
## [61] "FG.Id"
## [62] "FG.IsotopeLabelType"
## [63] "FG.Label"
## [64] "FG.PrecMz"
## [65] "FG.PrecMzCalibrated"
## [66] "FG.Reference"
## [67] "FG.SyntheticIsotopeGroupLabel"
## [68] "FG.FWHM"
## [69] "FG.MeanApexRT"
## [70] "FG.MeanTailingFactor"
## [71] "FG.PrecWindowNumber"
## [72] "FG.PrecursorSignalToNoise"
## [73] "FG.SignalToNoise"
## [74] "FG.ShapeQualityScore"
## [75] "FG.MS1PeakArea"
## [76] "FG.NormalizedMS1PeakArea"
## [77] "FG.MS2PeakArea"
## [78] "FG.NormalizedMS2PeakArea"
## [79] "FG.HasPossibleInterference (MS1)"
## [80] "FG.HasPossibleInterference (MS2)"
## [81] "FG.Quantity"
## [82] "FG.Noise"
Filter Spectronaut output report for distinct ions with a Q-value <= 0.001.
psm.distinct<-filter(d,EG.Qvalue<=0.001)%>%group_by(R.FileName,R.Condition)%>%summarise(distinct_ions=n_distinct(EG.Label))
## Warning: package 'bindrcpp' was built under R version 3.4.4

normalize data:
- Raw data (already in report)
- data normalized in Spectronaut with local normalization over complete profiles (already in report)
- global normalization using the median
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 96 rows containing non-finite values (stat_boxplot).
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 96 rows containing non-finite values (stat_boxplot).

calculate CV over ions for normalized and unnormalized data.
cv.all<-d%>%group_by(R.Condition,EG.Label)%>%
summarise(CV_Spectronaut_norm=sd(FG.NormalizedMS2PeakArea,na.rm=T)/mean(FG.NormalizedMS2PeakArea,na.rm=T),
CV_raw_data=sd(FG.MS2PeakArea,na.rm=T)/mean(FG.MS2PeakArea,na.rm=T),
CV_median_norm=sd(FG.MS2PeakArea.median.norm,na.rm=T)/mean(FG.MS2PeakArea.median.norm,na.rm=T))
cv.plot<-gather(cv.all,"CV_raw_data","CV_Spectronaut_norm","CV_median_norm",key="normalization_method",value="CV")
cv.plot$normalization_method<-factor(cv.plot$normalization_method,levels = c("CV_raw_data","CV_Spectronaut_norm","CV_median_norm"))
## Warning: Removed 4 rows containing non-finite values (stat_boxplot).
